In [1]:
import sklearn
import numpy as np
import pylab
import pickle
import pandas as pd
import glob
import os

We compute confusion matrix using the final object (not just the sklearn svm). But we do it without sound segmentation.


In [12]:
files = glob.glob('/mnt/protolab_innov/data/sounds/dataset_demo/*/*.wav')
files = glob.glob('/home/lgeorge/Downloads/dataset/*/*.wav')
_class = [os.path.basename(f).split('-')[0] for f in files]


df = pd.DataFrame(zip(_class, files), columns=['classname', 'filename'])
mask_to_remove = df.filename.str.contains('BlowNose') |   df.filename.str.contains('SmokeDetector')
mask_to_remove |=  df.filename.str.contains('Laugh')
mask_to_remove |=  df.filename.str.contains('RobotNoisePushed')

df = df[~mask_to_remove]

#df = df[~df.filename.str.contains('Romeo')]  # we remove file recorded on romeo from the database.. because there are in 44100Hz not 48000

print(df.classname.value_counts())
df.shape[0]


ToyGiraffe          200
Whistle             111
ClapHand            103
ToyPig               96
ToyMaracas           65
FakeSneeze           52
TacTac               51
DeskBell             50
ShutDoor             48
DoorBell01           36
HumanCaressHead      33
FireAlarmFr          32
DoorBell02           22
HumanScratchHead     21
RobotNoiseMoving     19
ApplauseLight        16
NoisePaper           13
dtype: int64
Out[12]:
968

In [34]:
%load_ext autoreload
%autoreload
from sound_classification.evaluate_classification import get_expected_predicted_stratified_fold

from sklearn.cross_validation import StratifiedKFold
n_folds = 3
stratified_fold = StratifiedKFold(df.classname, n_folds)  # we use only 3 fold.. as we have only 16 values on some data

folds = list(stratified_fold)
expected, predicted, labels, filenames = get_expected_predicted_stratified_fold(stratified_fold, df, calibrate_score=True, window_block=None)
# si on veut cropper 
#expected, predicted, labels, filenames = get_expected_predicted_stratified_fold(stratified_fold, df, calibrate_score=True, window_block=1.0, keep_first_slice_only=True)


predicted_class = [x.class_predicted for x in predicted]


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
CLF is SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-024-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-023-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
CLF is SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-024-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-023-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
CLF is SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-023-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
Exception fs (44100) != self.fs (48000.0) detected on /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-024-Romeo.wav
CLF is SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-024-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
Exception fs (44100) != self.fs (48000.0) detected on /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-023-Romeo.wav
confidence threshold are {'RobotNoiseMoving': 0.1064249731289916, 'ShutDoor': 0.41000000000000003, 'FireAlarmFr': 0.1064249731289916, 'NoisePaper': 0.1064249731289916, 'ApplauseLight': 0.1064249731289916, 'ClapHand': 0.44, 'ToyPig': 0.69000000000000006, 'HumanScratchHead': 0.38, 'TacTac': 0.55000000000000004, 'HumanCaressHead': 0.41999999999999998, 'DoorBell01': 0.32000000000000001, 'Whistle': 0.14999999999999999, 'DoorBell02': 0.1064249731289916, 'DeskBell': 0.1064249731289916, 'ToyGiraffe': 0.68000000000000005, 'ToyMaracas': 0.1064249731289916, 'FakeSneeze': 0.31}
confidence coefficients are {'DoorBell01': 3.125, 'ShutDoor': 2.4390243902439024, 'FireAlarmFr': 9.396290838504205, 'NoisePaper': 9.396290838504205, 'ApplauseLight': 9.396290838504205, 'ClapHand': 2.272727272727273, 'ToyPig': 1.4492753623188404, 'HumanScratchHead': 2.6315789473684212, 'TacTac': 1.8181818181818181, 'HumanCaressHead': 2.380952380952381, 'RobotNoiseMoving': 9.396290838504205, 'Whistle': 6.666666666666667, 'DoorBell02': 9.396290838504205, 'DeskBell': 9.396290838504205, 'FakeSneeze': 3.2258064516129035, 'ToyMaracas': 9.396290838504205, 'ToyGiraffe': 1.4705882352941175}
CLF is SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-025-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-023-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
CLF is SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-023-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
Exception fs (44100) != self.fs (48000.0) detected on /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-025-Romeo.wav
CLF is SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-025-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-023-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
CLF is SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-025-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
Exception fs (44100) != self.fs (48000.0) detected on /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-023-Romeo.wav
confidence threshold are {'RobotNoiseMoving': 0.23999999999999999, 'ShutDoor': 0.46000000000000002, 'FireAlarmFr': 0.22, 'NoisePaper': 0.14849827575739388, 'ApplauseLight': 0.14849827575739388, 'ClapHand': 0.47999999999999998, 'ToyPig': 0.67000000000000004, 'HumanScratchHead': 0.58999999999999997, 'TacTac': 0.56000000000000005, 'HumanCaressHead': 0.42999999999999999, 'DoorBell01': 0.23999999999999999, 'Whistle': 0.23000000000000001, 'DoorBell02': 0.14849827575739388, 'DeskBell': 0.14849827575739388, 'ToyGiraffe': 0.69000000000000006, 'ToyMaracas': 0.14849827575739388, 'FakeSneeze': 0.20000000000000001}
confidence coefficients are {'DoorBell01': 4.166666666666667, 'ShutDoor': 2.1739130434782608, 'FireAlarmFr': 4.545454545454546, 'NoisePaper': 6.734084923879724, 'ApplauseLight': 6.734084923879724, 'ClapHand': 2.0833333333333335, 'ToyPig': 1.4925373134328357, 'HumanScratchHead': 1.6949152542372883, 'TacTac': 1.7857142857142856, 'HumanCaressHead': 2.3255813953488373, 'RobotNoiseMoving': 4.166666666666667, 'Whistle': 4.3478260869565215, 'DoorBell02': 6.734084923879724, 'DeskBell': 6.734084923879724, 'FakeSneeze': 5.0, 'ToyMaracas': 6.734084923879724, 'ToyGiraffe': 1.4492753623188404}
CLF is SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-025-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-024-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
CLF is SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-024-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
Exception fs (44100) != self.fs (48000.0) detected on /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-025-Romeo.wav
CLF is SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-025-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-024-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
CLF is SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
warning file /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-025-Romeo.wav, wrong fs 44100, using it.. please remove the file if you don't want
Exception fs (44100) != self.fs (48000.0) detected on /home/lgeorge/Downloads/dataset/DeskBell/DeskBell-024-Romeo.wav
confidence threshold are {'RobotNoiseMoving': 0.19, 'ShutDoor': 0.59999999999999998, 'FireAlarmFr': 0.12525119272426644, 'NoisePaper': 0.12525119272426644, 'ApplauseLight': 0.12525119272426644, 'ClapHand': 0.32000000000000001, 'ToyPig': 0.62, 'HumanScratchHead': 0.62, 'TacTac': 0.57000000000000006, 'HumanCaressHead': 0.66000000000000003, 'DoorBell01': 0.33000000000000002, 'Whistle': 0.28000000000000003, 'DoorBell02': 0.12525119272426644, 'DeskBell': 0.12525119272426644, 'ToyGiraffe': 0.73999999999999999, 'ToyMaracas': 0.12525119272426644, 'FakeSneeze': 0.12525119272426644}
confidence coefficients are {'DoorBell01': 3.0303030303030303, 'ShutDoor': 1.6666666666666667, 'FireAlarmFr': 7.983955906922536, 'NoisePaper': 7.983955906922536, 'ApplauseLight': 7.983955906922536, 'ClapHand': 3.125, 'ToyPig': 1.6129032258064517, 'HumanScratchHead': 1.6129032258064517, 'TacTac': 1.7543859649122806, 'HumanCaressHead': 1.5151515151515151, 'RobotNoiseMoving': 5.2631578947368425, 'Whistle': 3.571428571428571, 'DoorBell02': 7.983955906922536, 'DeskBell': 7.983955906922536, 'FakeSneeze': 7.983955906922536, 'ToyMaracas': 7.983955906922536, 'ToyGiraffe': 1.3513513513513513}

In [35]:
%load_ext autoreload
%autoreload
%pylab notebook
#import seaborn as sns
#sns.reset_orig()
import sound_classification.evaluate_classification
sound_classification.evaluate_classification.print_report(expected, predicted_class, labels)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Populating the interactive namespace from numpy and matplotlib
Out[35]:

In [137]:
# REORDERING COLUMNS..
%pylab inline
labels_bis = labels.copy()
labels_bis = ['ApplauseLight', 'ClapHand', 'DeskBell', 'DoorBell01', 'DoorBell02',
 'FakeSneeze', 'FireAlarmFr', 'NoisePaper', 'TacTac', 'ToyChicken', 'ToyGiraffe', 'ToyMaracas', 'ToyPig',
 'Whistle', 'HumanCaressHead', 'HumanScratchHead', 'VoiceAlex', 'VoiceLaurent']
labels_bis = np.array(labels_bis, dtype=np.object)
%load_ext autoreload
%autoreload
import seaborn as sns
sns.reset_orig()
import sound_classification.evaluate_classification
pylab.figure()

fig = sound_classification.evaluate_classification.print_report(expected, predicted_class, labels_bis)
fig.savefig('/tmp/final/conf_mat_withou_threshold.png', dpi=600)


Populating the interactive namespace from numpy and matplotlib
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
[[ 16   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0  96   0   0   0   0   0   0   0   0   7   0   0   0   0   0   0   0]
 [  0   0  46   0   0   0   0   0   0   0   4   0   0   0   0   0   0   0]
 [  0   0   0  34   0   0   0   0   0   0   0   0   0   0   0   0   2   0]
 [  0   0   0   0  22   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0  50   0   0   0   0   0   0   0   0   0   0   2   0]
 [  0   0   0   0   0   0  32   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   4   0   0   0   0   0   0   9   0   0   0]
 [  0   0   0   0   0   0   0   0  50   0   1   0   0   0   0   0   0   0]
 [  0   1   0   0   0   0   0   0   0  81   0   0   0   0   1   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 200   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   4  61   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   1   0 191   0   0   0   0   0]
 [  0   1   0   0   0   0   0   0   0   0   2   0   0 108   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0  31   2   0   0]
 [  0   0   0   0   0   0   0   0   0   0   4   0   0   0   3  14   0   0]
 [  0   0   0   0   0   0   0   0   0   0   3   0   0   0   0   0 115   1]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  32   0]]
<matplotlib.figure.Figure at 0x7f399114c350>

In [72]:
type(labels)
labels.shape
np.array(labels_bis, dtype=np.object).dtype
labels.dtype


Out[72]:
dtype('O')

FILTERING THRESHOLD ..


In [136]:
prediction_df = pd.DataFrame([[x.confidence, x.score, x.class_predicted, x.timestamp_start, expected_val, filename] for x, expected_val, filename in zip(predicted, expected, filenames)], columns=['confidence', 'score', 'class_predicted', 'timestamp_start', 'expected', 'filename'])
mask_wrong = prediction_df.score < 0.9
#prediction_df[mask_wrong].class_predicted = 'UNKNOWN'
prediction_df.class_predicted[mask_wrong] = 'UNKNOWN'
%pylab notebook
new_labels = np.concatenate([labels_bis,  ['UNKNOWN']])
pylab.figure()
sound_classification.evaluate_classification.print_report(list(prediction_df.expected) + ['UNKNOWN'], list(prediction_df.class_predicted) + ['UNKNOWN'], new_labels  )
pylab.savefig('/tmp/final/conf_mat_with_threshold.png', dpi=600)


Populating the interactive namespace from numpy and matplotlib
/usr/local/lib/python2.7/dist-packages/IPython/kernel/__main__.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
[[ 16   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0]
 [  0  92   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   11]
 [  0   0  46   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0
    3]
 [  0   0   0  33   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    3]
 [  0   0   0   0  22   0   0   0   0   0   0   0   0   0   0   0   0   0
    0]
 [  0   0   0   0   0  50   0   0   0   0   0   0   0   0   0   0   0   0
    2]
 [  0   0   0   0   0   0  32   0   0   0   0   0   0   0   0   0   0   0
    0]
 [  0   0   0   0   0   0   0   4   0   0   0   0   0   0   3   0   0   0
    6]
 [  0   0   0   0   0   0   0   0  44   0   0   0   0   0   0   0   0   0
    7]
 [  0   0   0   0   0   0   0   0   0  81   0   0   0   0   0   0   0   0
    2]
 [  0   0   0   0   0   0   0   0   0   0 192   0   0   0   0   0   0   0
    8]
 [  0   0   0   0   0   0   0   0   0   0   0  61   0   0   0   0   0   0
    4]
 [  0   0   0   0   0   0   0   0   0   0   0   0 167   0   0   0   0   0
   25]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0 108   0   0   0   0
    3]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0  16   1   0   0
   16]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  14   0   0
    7]
 [  0   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0  28   1
   89]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   4   0
   28]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    1]]

In [105]:
np.sum(prediction_df.class_predicted == 'UNKNOWN')
#np.sum(mask_wrong)


Out[105]:
284